import scrapy
import logging
import time

logger = logging.getLogger(__name__)

class GiteeSpider(scrapy.Spider):
    name = 'gitee'                      #爬虫名称
    allowed_domains = ['gitee.com']     #允许爬虫的范围
    start_urls = ['https://search.gitee.com/?skin=rec&type=repository&q=python']  #初始url地址

    def parse(self, response):
        div_list = response.xpath('//*[@id="hits-list"]/div')
        for div in div_list:
            item = {}
            item['project'] = div.xpath('./div[1]/div/a').xpath('string(.)').extract_first()
            item['introduction'] = div.xpath('./div[2]').xpath('string(.)').extract_first()
            item['project'] = item['project'].strip()
            item['introduction'] = item['introduction'].strip()
            logger.warning(item)
            yield item

        # time.sleep(1)
        prev_url = "https://search.gitee.com/"
        page = response.xpath('//*[@class="next"]/a/@href').extract_first()
        print(page)
        if page[-1:-3] != "101":
            next_url = prev_url + page
            yield scrapy.Request(
                url= next_url,
                callback= self.parse
            )
